实验表明,gradient penalty能够显著提高训练速度,解决了原始WGAN收敛缓慢的问题
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.gridspec as gridspec
from glob import glob
import os
import helper
mnist = input_data.read_data_sets('./MNIST_data', one_hot=True)
mnist_datadir = './mnist'
celeba_datadir = './img_align_celeba'
show_n_images = 16
mnist_images = np.reshape(mnist.train.images[:show_n_images], (show_n_images,28, 28))
plt.imshow(helper.my_images_square_grid(mnist_images, 'L'), cmap='gray')
print(mnist_images.shape)
CelebFaces Attributes Dataset (CelebA) 是一个包含 20 多万张名人图片及相关图片说明的数据集。你将用此数据集生成人脸,不会用不到相关说明。
mnist_images = helper.get_batch(glob(os.path.join(celeba_datadir, '*.jpg'))[:show_n_images], 28, 28, 'RGB')
plt.imshow(helper.images_square_grid(mnist_images, 'RGB'))
部署 model_inputs 函数以创建用于神经网络的 [占位符 (TF Placeholders)]
创建以下占位符:
image_width,image_height 和 image_channels 设置为 rank 4。z_dim。返回占位符元组的形状为 (tensor of real input images, tensor of z data, learning rate)。
def model_inputs(image_width, image_height, image_channels, z_dim):
inputs_real = tf.placeholder(tf.float32, (None, image_width, image_height, image_channels), name='inputs_real')
inputs_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')
learning_rate = tf.placeholder(tf.float32, (None))
return inputs_real, inputs_z, learning_rate
def LeakRelu(alpha, x):
x = tf.maximum(alpha * x, x)
return x
########### 没找到layer_nomalization 对应的接口,暂时先不用了。。 ##############
def discriminator(images, reuse = False):
alpha = 0.2
keep_prob = 0.8
fc_weight_init = tf.contrib.layers.xavier_initializer()
conv_weight_init = tf.contrib.layers.xavier_initializer_conv2d()
with tf.variable_scope('discriminator', reuse = reuse):
x1 = tf.layers.conv2d(images, 64, 3, strides = 2, padding='same', kernel_initializer = conv_weight_init)
#x1 = tf.layers.batch_normalization(x1, training = True)
x1 = LeakRelu(alpha, x1)
#x1 = tf.nn.dropout(x1, keep_prob)
x2 = tf.layers.conv2d(x1, 128, 3, strides=2, padding='same', kernel_initializer = conv_weight_init)
#x2 = tf.layers.batch_normalization(x2, training = True)
x2 = LeakRelu(alpha ,x2)
#x2 = tf.nn.dropout(x2, keep_prob)
x3 = tf.layers.conv2d(x2, 256, 3, strides = 2, padding = 'same', kernel_initializer = conv_weight_init)
#x3 = tf.layers.batch_normalization(x3, training = True)
x3 = LeakRelu(alpha, x3)
#x3 = tf.nn.dropout(x3, keep_prob)
x4 = tf.reshape(x3, (-1, int(x3.shape[1]) * int(x3.shape[2]) * int(x3.shape[3])))
out = tf.layers.dense(x4, 1, kernel_initializer = fc_weight_init)
############## wgan 在此处取消了激活函数的使用 ##############
return out
部署 generator 函数以使用 z 生成图像。该函数应能够重复使用神经网络中的各种变量。
在 tf.variable_scope 中使用 "generator" 的变量空间名来重复使用该函数中的变量。
该函数应返回所生成的 28 x 28 x out_channel_dim 维度图像。
def generator(input_z, out_channel_dim, is_train = True):
reuse = not is_train
alpha = 0.2
keep_prob = 0.8
fc_weight_init = tf.contrib.layers.xavier_initializer()
conv_weight_init = tf.contrib.layers.xavier_initializer_conv2d()
with tf.variable_scope('generator', reuse=reuse):
x1 = tf.layers.dense(input_z, 4*4*512, kernel_initializer = conv_weight_init)
x1 = tf.reshape(x1, (-1, 4, 4, 512))
x1 = tf.layers.batch_normalization(x1, training=is_train)
x1 = LeakRelu(alpha, x1)
#x1 = tf.nn.dropout(x1, 0.8)
# 4x4x512
x2 = tf.layers.conv2d_transpose(x1, 256, 4, strides=1, padding='valid', kernel_initializer = conv_weight_init)
x2 = tf.layers.batch_normalization(x2, training=is_train)
x2 = LeakRelu(alpha, x2)
#x2 = tf.nn.dropout(x2, 0.8)
# 7x7x256
x3 = tf.layers.conv2d_transpose(x2, 128, 3, strides=2, padding='same', kernel_initializer = conv_weight_init)
x3 = tf.layers.batch_normalization(x3, training=is_train)
x3 = LeakRelu(alpha, x3)
#x3 = tf.nn.dropout(x3, 0.8)
# 14x14x128 now
logits = tf.layers.conv2d_transpose(x3, out_channel_dim, 3, strides=2, padding='same', kernel_initializer =conv_weight_init)
#x2 = tf.nn.dropout(x2, 0.8)
# 28x28x3 now
out = tf.tanh(logits)
return out
部署 model_loss 函数训练并计算 GANs 的损失。该函数应返回形如 (discriminator loss, generator loss) 的元组。
def model_loss(input_real, input_z, image_channels, batch_size):
real_out = discriminator(input_real, reuse = False)
fake_images = generator(input_z, image_channels, is_train = True)
fake_out = discriminator(fake_images, reuse = True)
d_real_loss = tf.reduce_mean(real_out)
d_fake_loss = tf.reduce_mean(fake_out)
d_loss = d_fake_loss - d_real_loss
# 计算梯度惩罚项
alpha = tf.random_uniform(
shape=[batch_size,1, 1, 1],
minval= 0.,
maxval= 1.)
differences = fake_images - input_real
interpolates = input_real + (alpha*differences)
gradients = tf.gradients(discriminator(interpolates, reuse = True), [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
gradient_penalty = tf.reduce_mean((slopes-1.)**2)
d_loss += LAMBDA * gradient_penalty
g_loss = -tf.reduce_mean(d_fake_loss)
return d_loss, g_loss
def model_opt(d_loss, g_loss, learning_rate):
"""
Get optimization operations
:param d_loss: Discriminator loss Tensor
:param g_loss: Generator loss Tensor
:param learning_rate: Learning Rate Placeholder
:param beta1: The exponential decay rate for the 1st moment in the optimizer
:return: A tuple of (discriminator training operation, generator training operation)
"""
all_var = tf.trainable_variables()
g_var = [var for var in all_var if var.name.startswith('generator')]
d_var = [var for var in all_var if var.name.startswith('discriminator')]
all_train_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
g_update_ops = [ops for ops in all_train_ops if ops.name.startswith('generator')]
d_update_ops = [ops for ops in all_train_ops if ops.name.startswith('discriminator')]
with tf.control_dependencies(g_update_ops):
g_train_opt = tf.train.RMSPropOptimizer(learning_rate).minimize(g_loss, var_list = g_var)
with tf.control_dependencies(d_update_ops):
d_train_opt = tf.train.RMSPropOptimizer(learning_rate).minimize(d_loss, var_list = d_var)
return d_train_opt, g_train_opt
import numpy as np
import matplotlib.pyplot as plt
def show_generator_output(sess, n_images, input_z, out_channel_dim, image_mode):
"""
Show example output for the generator
:param sess: TensorFlow session
:param n_images: Number of Images to display
:param input_z: Input Z Tensor
:param out_channel_dim: The number of channels in the output image
:param image_mode: The mode to use for images ("RGB" or "L")
"""
cmap = None if image_mode == 'RGB' else 'gray'
z_dim = input_z.get_shape().as_list()[-1]
example_z = np.random.uniform(-1, 1, size=[n_images, z_dim])
samples = sess.run(
generator(input_z, out_channel_dim, False),
feed_dict={input_z: example_z})
images_grid = helper.images_square_grid(samples, image_mode)
plt.imshow(images_grid, cmap=cmap)
plt.show()
def train(epoch_count, batch_size, z_dim, learning_rate, get_batches, data_shape, data_image_mode):
"""
Train the GAN
:param epoch_count: Number of epochs
:param batch_size: Batch Size
:param z_dim: Z dimension
:param learning_rate: Learning Rate
:param beta1: The exponential decay rate for the 1st moment in the optimizer
:param get_batches: Function to get batches
:param data_shape: Shape of the data
:param data_image_mode: The image mode to use for images ("RGB" or "L")
"""
# TODO: Build Model
inputs_real, inputs_z, lr = model_inputs(data_shape[1], data_shape[2], data_shape[3], z_dim)
d_loss, g_loss = model_loss(inputs_real, inputs_z, data_shape[-1], batch_size)
d_train_opt, g_train_opt = model_opt(d_loss, g_loss, learning_rate)
step = 0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch_i in range(epoch_count):
for batch_images in get_batches(batch_size):
# TODO: Train Model
step = step+1
batch_images = batch_images * 2
# Sample random noise for G
batch_z = np.random.uniform(-1, 1, size=(batch_size, z_dim))
#print('batch_z shape=',batch_z.shape)
# Run optimizers
_ = sess.run(d_train_opt, feed_dict={inputs_real: batch_images, inputs_z: batch_z, lr:learning_rate})
_ = sess.run(g_train_opt, feed_dict={inputs_z: batch_z, lr:learning_rate})
_ = sess.run(g_train_opt, feed_dict={inputs_z: batch_z, lr:learning_rate})
if step % 200 == 0:
train_loss_d = d_loss.eval({inputs_z:batch_z, inputs_real: batch_images})
train_loss_g = g_loss.eval({inputs_z:batch_z})
print("Epoch {}/{} Step {}...".format(epoch_i+1, epoch_count, step),
"Discriminator Loss: {:.4f}...".format(train_loss_d),
"Generator Loss: {:.4f}".format(train_loss_g))
if step % 400 == 0:
show_generator_output(sess, 16, inputs_z, data_shape[3], data_image_mode)
batch_size =32
z_dim = 100
learning_rate = 0.003
LAMBDA = 12 # 梯度惩罚项lambda
epochs = 1
mnist_dataset = helper.Dataset('mnist', glob(os.path.join(mnist_datadir, '*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate,mnist_dataset.get_batches,
mnist_dataset.shape, mnist_dataset.image_mode)
batch_size = 32
z_dim = 100
learning_rate = 0.003
LAMBDA = 12 # 梯度惩罚项lambda
epochs = 10
celeba_dataset = helper.Dataset('celeba', glob(os.path.join(celeba_datadir, '*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate, celeba_dataset.get_batches,
celeba_dataset.shape, celeba_dataset.image_mode)
batch_size =32
z_dim = 100
learning_rate = 0.003
LAMBDA = 12 # 梯度惩罚项lambda
epochs = 1
mnist_dataset = helper.Dataset('mnist', glob(os.path.join(mnist_datadir, '*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate,mnist_dataset.get_batches,
mnist_dataset.shape, mnist_dataset.image_mode)